This notebook was prepared with the following environmental settings.
speech1=paste(readLines("../data/fulltext/SpeechDonaldTrump-NA.txt",
n=-1, skipNul=TRUE),
collapse=" ")
speech2=paste(readLines("../data/fulltext/SpeechDonaldTrump-NA2.txt",
n=-1, skipNul=TRUE),
collapse=" ")
speech3=paste(readLines("../data/fulltext/PressDonaldTrump-NA.txt",
n=-1, skipNul=TRUE),
collapse=" ")
Trump.speeches=data.frame(
President=rep("Donald J. Trump", 3),
File=rep("DonaldJTrump", 3),
Term=rep(0, 3),
Party=rep("Republican", 3),
Date=c("August 31, 2016", "September 7, 2016", "January 11, 2017"),
Words=c(word_count(speech1), word_count(speech2), word_count(speech3)),
Win=rep("yes", 3),
type=rep("speeches", 3),
links=rep(NA, 3),
urls=rep(NA, 3),
fulltext=c(speech1, speech2, speech3)
)
speech.list=rbind(speech.list, Trump.speeches)
First, we look at nomination acceptance speeches at major party’s national conventions. For relevant to Trump’s speeches, we limit our attention to speeches for the first terms of former U.S. presidents. We noticed that a number of presidents have very short sentences in their nomination acceptance speeches.
count_term <- table(speech.list[speech.list$type == "inaug",]$File)
multi.term.sel <- names(which(count_term > 1))
for(i in 1:length(multi.term.sel)){
png(paste("../output/", multi.term.sel[i], "compare12.png", sep=""),
width=300, height=300)
#sel.comparison=levels(sentence.list$FileOrdered)
sentence.list.sel=filter(sentence.list,
type=="inaug", File%in%multi.term.sel[i])
sentence.list.sel$Term=factor(sentence.list.sel$Term)
sentence.list.sel$termOrdered=reorder(sentence.list.sel$Term,
sentence.list.sel$word.count,
mean,
order=T)
beeswarm(word.count~termOrdered,
data=sentence.list.sel,
horizontal = TRUE,
pch=16, col=alpha(brewer.pal(9, "Set1"), 0.6),
cex=0.55, cex.axis=0.8, cex.lab=0.8,
spacing=3/nlevels(sentence.list.sel$termOrdered),
las=2, xlab="Number of words in a sentence.", ylab="",
main="Inauguration speeches of multi-term")
dev.off()
}
What are these short sentences?
sentence.list%>%
filter(File=="DonaldJTrump",
type=="nomin",
word.count<=3)%>%
select(sentences)%>%sample_n(10)
sentence.list%>%
filter(File=="AlbertGore,Jr",
type=="nomin",
word.count<=3)%>%
select(sentences)%>%sample_n(10)
sentence.list%>%
filter(File=="Clinton",
type=="nomin",
word.count<=3)%>%
select(sentences)
sentence.list%>%
filter(File=="WilliamJClinton",
type=="nomin", Term==1,
word.count<=3)%>%
select(sentences)
We notice that the sentences in inaugural speeches are longer than those in nomination acceptance speeches.
sentence.list.sel=sentence.list%>%filter(type=="inaug", File%in%sel.comparison, Term==1)
sentence.list.sel$File=factor(sentence.list.sel$File)
sentence.list.sel$FileOrdered=reorder(sentence.list.sel$File,
sentence.list.sel$word.count,
mean,
order=T)
par(mar=c(4, 11, 2, 2))
beeswarm(word.count~FileOrdered,
data=sentence.list.sel,
horizontal = TRUE,
pch=16, col=alpha(brewer.pal(9, "Set1"), 0.6),
cex=0.55, cex.axis=0.8, cex.lab=0.8,
spacing=5/nlevels(sentence.list.sel$FileOrdered),
las=2, ylab="", xlab="Number of words in a sentence.",
main="Inaugural Speeches")
Short sentences in inaugural speeches.
sentence.list%>%
filter(File=="BarackObama",
type=="inaug",
word.count<=3)%>%
select(sentences)
trump_bigrams <- speech.list[speech.list$File == "DonaldJTrump",] %>%
unnest_tokens(bigram, fulltext, token = "ngrams", n = 2)
bigrams_separated <- trump_bigrams %>%
separate(bigram, c("word1", "word2"), sep = " ")
bigrams_filtered <- bigrams_separated %>%
filter(!word1 %in% stop_words$word) %>%
filter(!word2 %in% stop_words$word)
bigram_counts <- bigrams_filtered %>%
count(word1, word2, sort = TRUE)
bigrams_united <- bigrams_filtered %>%
unite(bigram, word1, word2, sep = " ")
bigrams_united <- bigrams_united %>%
count(type, bigram) %>%
arrange(desc(n))
bigrams_united <- bigrams_united %>%
mutate(bigram = factor(bigram, levels = rev(unique(bigram)))) %>%
group_by(type) %>%
arrange(desc(n))%>%
top_n(5) %>%
ungroup
Selecting by n
ggplot(bigrams_united, aes(bigram, n, fill = type)) +
geom_bar(stat = "identity", show.legend = FALSE) +
labs(x = NULL, y = "count of words") +
facet_wrap(~type, ncol = 2, scales = "free") +
coord_flip()
##################
How our presidents (or candidates) alternate between long and short sentences and how they shift between different sentiments in their speeches. It is interesting to note that some presidential candidates’ speech are more colorful than others. Here we used the same color theme as in the movie “Inside Out.”
image
par(mfrow=c(4,1), mar=c(1,0,2,0), bty="n", xaxt="n", yaxt="n", font.main=1)
f.plotsent.len(In.list=sentence.list, InFile="HillaryClinton",
InType="nomin", InTerm=1, President="Hillary Clinton")
f.plotsent.len(In.list=sentence.list, InFile="DonaldJTrump",
InType="nomin", InTerm=1, President="Donald Trump")
f.plotsent.len(In.list=sentence.list, InFile="BarackObama",
InType="nomin", InTerm=1, President="Barack Obama")
f.plotsent.len(In.list=sentence.list, InFile="GeorgeWBush",
InType="nomin", InTerm=1, President="George W. Bush")
print("Hillary Clinton")
[1] "Hillary Clinton"
speech.df=tbl_df(sentence.list)%>%
filter(File=="HillaryClinton", type=="nomin", word.count>=4)%>%
select(sentences, anger:trust)
speech.df=as.data.frame(speech.df)
as.character(speech.df$sentences[apply(speech.df[,-1], 2, which.max)])
[1] "Some of you are frustrated, even furious."
[2] "It's a big deal."
[3] "Powerful forces are threatening to pull us apart."
[4] "Powerful forces are threatening to pull us apart."
[5] "It's a big deal."
[6] "My mother, Dorothy, was abandoned by her parents as a young girl."
[7] "It's a big deal."
[8] "Bonds of trust and respect are fraying."
print("Barack Obama")
[1] "Barack Obama"
speech.df=tbl_df(sentence.list)%>%
filter(File=="BarackObama", type=="nomin", Term==1, word.count>=5)%>%
select(sentences, anger:trust)
speech.df=as.data.frame(speech.df)
as.character(speech.df$sentences[apply(speech.df[,-1], 2, which.max)])
[1] "They could've heard words of anger and discord."
[2] "And that's to be expected."
[3] "It's not because John McCain doesn't care."
[4] "Now let there be no doubt."
[5] "That promise is our greatest inheritance."
[6] "Now let there be no doubt."
[7] "That's not the judgment we need."
[8] "That promise is our greatest inheritance."
print("George W Bush")
[1] "George W Bush"
speech.df=tbl_df(sentence.list)%>%
filter(File=="GeorgeWBush", type=="nomin", Term==1, word.count>=4)%>%
select(sentences, anger:trust)
speech.df=as.data.frame(speech.df)
as.character(speech.df$sentences[apply(speech.df[,-1], 2, which.max)])
[1] "On the other side of that wall are poverty and prison, addiction and despair."
[2] "We're proud of you."
[3] "But they've got it backwards."
[4] "And at the earliest possible date, my administration will deploy missile defenses to guard against attack and blackmail."
[5] "I appreciate his friendship."
[6] "On the other side of that wall are poverty and prison, addiction and despair."
[7] "They had their chance."
[8] "Corporations are responsible to treat their workers fairly and to leave the air and waters clean."
print("Donald Trump")
[1] "Donald Trump"
speech.df=tbl_df(sentence.list)%>%
filter(File=="DonaldJTrump", type=="nomin", Term==1, word.count>=5)%>%
select(sentences, anger:trust)
speech.df=as.data.frame(speech.df)
as.character(speech.df$sentences[apply(speech.df[,-1], 2, which.max)])
[1] "Once again, France is the victim of brutal Islamic terrorism."
[2] "God bless you, and good night!"
[3] "I have visited the laid-off factory workers, and the communities crushed by our horrible and unfair trade deals."
[4] "Once again, France is the victim of brutal Islamic terrorism."
[5] "God bless you, and good night!"
[6] "Three were killed, and three were very very badly injured."
[7] "God bless you, and good night!"
[8] "God bless you, and good night!"
heatmap.2(cor(sentence.list%>%filter(type=="inaug")%>%select(anger:trust)),
scale = "none",
col = bluered(100), , margin=c(6, 6), key=F,
trace = "none", density.info = "none")
par(mar=c(4, 6, 2, 1))
emo.means=colMeans(select(sentence.list, anger:trust)>0.01)
col.use=c("red2", "darkgoldenrod1",
"chartreuse3", "blueviolet",
"darkgoldenrod2", "dodgerblue3",
"darkgoldenrod1", "darkgoldenrod1")
barplot(emo.means[order(emo.means)], las=2, col=col.use[order(emo.means)], horiz=T, main="Inaugural Speeches")